Figure 7 and supplementary figure 7

Author

Niccolò Arecco

1 Intro

Last code execution: 2024 February 09, Friday @ 13:02:08.

Write a concise introduction.

Code
sshfs narecco@ant-login.linux.crg.es:/users/mirimia ~/mnt

2 Set Up

2.1 Packages

Load packages required for the analysis and suppress any message. Check the Section 5 section at the end for package versions.

Code
library(dplyr, warn.conflicts = F, quietly = T)
library(readr)
library(stringr)
library(tidyr)
library(tibble)
library(forcats)
library(stringr)
library(ggplot2)
library(ggrastr) # for rasterizing only the geom_point layer in a plot with many data-points
library(ggforce)
library(ggsignif)
library(patchwork)
library(rstatix)
library(org.Mm.eg.db)
library(clusterProfiler)
library(pheatmap)
library(DT)
library(UpSetR)

Load my R package niar to use some custom-made functions

Code
if ( ('niar' %in% .packages(all.available = TRUE)) == TRUE ) {
  library('niar')
} else if ( ('niar' %in% .packages(all.available = TRUE)) == FALSE ){
  message("The package niar is not installed so I'm trying to load it from ",
          "the local repository of the package")
  if ( dir.exists('~/mnt/narecco/software/R/niar' ) )  {
    devtools::load_all(path = '~/mnt/narecco/software/R/niar')
  } else {
    stop("Can't find the local repo of the niar package! ",
         "You must install it with:\n",
         "devtools::install_github('Ni-Ar/niar') ")
  }
} else{
  stop("Can't understand if 'niar' package was installed beforehand")
}
The package niar is not installed so I'm trying to load it from the local repository of the package
ℹ Loading niar

2.2 Functions

Custom-made functions.

Code
import_res <- function(file_path, signif_thrshld = 0.05, invert_l2FC = F ) {
  df <- read_delim(file = file_path, delim = '\t', show_col_types = F)

  # annotate res
  df |>
    mutate(signif = case_when(padj <= signif_thrshld ~ TRUE,
                              padj > signif_thrshld ~ FALSE) ) -> df
    
  if (invert_l2FC == TRUE) {
    df <-  df |> mutate(log2FoldChange = -log2FoldChange)
  }
    df |>
    mutate(direction = case_when(signif == TRUE & log2FoldChange >= 0 ~ 'UP',
                                 signif == TRUE & log2FoldChange < 0 ~ 'DOWN',
                                 signif == FALSE | is.na(padj) ~ 'NONE') ) |>
    # order point for plotting
    mutate(direction = factor(direction, levels = c('NONE', 'UP', 'DOWN'))) |>
    arrange(direction) |> # baseMean
    # filter out missing values for the plot
    subset(!is.na(log2FoldChange)) |>
    subset(!is.na(baseMean)) -> res
    return(res)
}

Import RPKMs from Enrique’s analysis

Code
import_rpkms <- function(base_path, file_name, sample_order) {
  # path
  rpkm_path <- file.path(base_path, file_name)
  stopifnot(file.exists(rpkm_path))
  # read and reshape
  read_delim(file = rpkm_path, delim = "\t", show_col_types = FALSE, 
             col_names = c('external_gene_name', sample_order)) |>
    pivot_longer(cols = -("external_gene_name"), names_to = "Pretty_Sample", 
                 values_to = "RPKM") -> df_rpkm
  return(df_rpkm)
}

Helper function required to plot number of DEGs

Code
summarise_DEGs <- function(file_path, sample_name_contrast, signif_thrshld = 0.05, ... ) {
  # import file
  anno_df <- import_res(file_path, signif_thrshld, ...) |>
    mutate(contrast = paste0(sample_name_contrast, '_vs_WT') )
  
  # count how many DEG per condition.
  anno_df |>
    count(direction, contrast) |>
    complete(direction, contrast, fill = list(n = 0) ) |>
    setNames(c('direction', 'contrast', 'Num_genes')) -> summary_df
  return(summary_df)
}

Run GO from file with gene names.

Code
run_enrichGO_UP_DOWN <- function(up_genes_path, do_genes_path, sample_name_contrast) {
  UP_genes <- read.table(up_genes_path)[,1]
  message('Number of ', sample_name_contrast, 
          ' up-regulated genes entering the GO term analysis: ',
          length(UP_genes))
  
  res <- enrichGO(gene          = UP_genes,
                  OrgDb         = org.Mm.eg.db,
                  keyType       = 'SYMBOL',
                  ont           = "ALL",
                  pAdjustMethod = "BH",
                  pvalueCutoff  = 0.05,
                  qvalueCutoff  = 0.1)
  
  GO_UP <- as_tibble(res) |>
    mutate(Sample = sample_name_contrast, 
           Direction = 'UP',
           .before = ONTOLOGY)
  
  DO_genes <- read.table(do_genes_path)[,1]
  message('Number of ', sample_name_contrast, 
          ' down-regulated genes entering the GO term analysis: ',
          length(DO_genes))
  
  res <- enrichGO(gene          = DO_genes,
                  OrgDb         = org.Mm.eg.db,
                  keyType       = 'SYMBOL',
                  ont           = "ALL",
                  pAdjustMethod = "BH",
                  pvalueCutoff  = 0.05,
                  qvalueCutoff  = 0.1)
  
  GO_DO <- as_tibble(res) |>
    mutate(Sample = sample_name_contrast, 
           Direction = 'DOWN',
           .before = ONTOLOGY)
  return(rbind(GO_UP, GO_DO))
}

Ggplot plotting function.

Code
plot_MA_fig <- function(file_path, signif_thrshld = 0.05, sample_name_contrast,  
                        Y_lim = c(-8, 8), Panel_Num, label = FALSE, ...) {

  anno_df <- import_res(file_path, signif_thrshld, ...) 

  ggplot(anno_df) +
    aes(x = log2(baseMean), y = log2FoldChange, fill = direction) +
    geom_point(shape = 21, stroke = 0.05, size = 0.65) +
    geom_hline(yintercept = 0, linewidth = 0.2, linetype = 'solid', colour = 'black' ) +
    scale_x_continuous(n.breaks = 6, 
                       expand = expansion(mult = c(0.01, 0), add = c(0.02, 0)) ) +
    scale_y_continuous(limits = Y_lim, oob = scales::squish, n.breaks = 10,
                       expand = expansion(mult = 0, add = 0.02 ) ) +
    scale_fill_manual(values = c('UP' = '#E63945', 
                                 'DOWN' = '#1D3557', 'NONE' = 'gray84') )  +
    labs(x = expression("Average expression (" ~ log[2] ~ ")" ), 
         y = paste0("log2 fold change ", sample_name_contrast, "/WT") ) +
    coord_cartesian(xlim = c(-3, 20), clip = 'on') +
    theme_classic(base_family = "Arial", base_size = 6) +
    theme(axis.text = element_text(colour = 'black'),
          axis.title = element_text(size = 5, colour = 'black'),
          axis.title.y = element_text(margin = margin(r = -0.2, unit = 'pt')),
          axis.title.x = element_text(margin = margin(t = -0.5, unit = 'pt')),
          axis.line = element_line(linewidth = 0.15),
          axis.ticks = element_line(colour = 'black', linewidth = 0.15),
          axis.ticks.length = unit(1, units = 'mm'),
          legend.position = 'none',
          legend.title = element_blank(),
          panel.grid.major = element_line(colour = 'gray90', linewidth = 0.075),
          panel.background = element_blank(),
          panel.border = element_blank(),
          plot.background = element_blank()) -> p_MA
  
  if(label == TRUE) {
    p_MA <- p_MA +
      geom_text_repel(data = subset(anno_df, direction == 'UP'), 
                      aes(label = external_gene_name), nudge_x = 0.2, nudge_y = 0.2, 
                      direction = 'x', force_pull = 2, segment.curvature = -1e-20,
                      size = 1, segment.size = unit(0.1, units = 'mm')) +
      geom_text_repel(data = subset(anno_df, direction == 'DOWN'), 
                      aes(label = external_gene_name), nudge_x = 0.2, nudge_y = -0.2,
                      direction = 'x', force_pull = 2, segment.curvature = -1e-20,
                      size = 1, segment.size = unit(0.1, units = 'mm'))  
  }
  
  # save all plot as vectorial
  ggsave(path = pdf_dir_fig, plot = p_MA, device = cairo_pdf, units = "cm",
         filename = paste0("Fig", Panel_Num, "_MA_plot_", sample_name_contrast,
                           "_vs_WT_vectorial_points", ".pdf"),
         width = 4.0, height = 3.0)
  
  # save all plot as vectorial, except for points as rasterized image at 400 dpi.
  r_MA <- rasterize(p_MA, layers = 'Point', dpi = 400)
  
  ggsave(path = pdf_dir_fig, plot = r_MA, device = cairo_pdf, units = "cm",
         filename = paste0("Fig", Panel_Num, "_MA_plot_", sample_name_contrast,
                           "_vs_WT_rasterized_points", ".pdf"),
         width = 4.0, height = 3.0)
  r_MA
}

Combine GO terms heatmap and upset plot into one.

Code
# Function to check gene presence. This should be generalised to any Sample name.
check_gene_presence <- function(gene, df) {
  data.frame(
    Gene = gene,
    KO = gene %in% df$Gene[df$Sample == "KO"],
    KOrL = gene %in% df$Gene[df$Sample == "KOrL"],
    KOrS = gene %in% df$Gene[df$Sample == "KOrS"]
  )
}
# lgnd_mm = c(width, height) in mm
plot_heatmap_upset <- function(df, k_text = 20, 
                               htmp_min_col = 'white', 
                               htmp_max_col = '#E63945',
                               bar_txt_size = 1.5,
                               htmp_txt_size = 2,
                               lgnd_mm = c(5, 2),
                               patch_rel_heights = c(5, 0.45),
                               isect_point_size = 1.25,
                               bar_num_nudge = 7, 
                               GO_txt_width = 20) {
  
  # order GO IDs description as factor and wrap ----------------------
  df <- df |>
    mutate(Description = str_wrap(Description, width = GO_txt_width) ) |>
    mutate(Description = fct_reorder(Description, 
                                     -log10(p.adjust), .desc = F))
  GO_description_lvl <- levels(df$Description)
  
  # Plot heatmap of GO terms ----------------------
  ggplot(df, aes(x = Sample, y = Description, 
                 fill = -log10(p.adjust) ) )  +
    geom_tile() +
    geom_text(aes(label = Count), size = htmp_txt_size ) +
    scale_fill_gradient(low = htmp_min_col, high = htmp_max_col)  +
    coord_cartesian(expand = F, clip = 'off') +
    theme_classic(base_size = 6, base_family = 'Arial') +
    guides(fill = guide_colourbar(title.position = "bottom") )+
    theme(legend.position = 'bottom',
          legend.direction = "horizontal", 
          legend.background = element_blank(),
          legend.box.background = element_blank(),
          legend.text = element_text(vjust = 1, margin = margin(t = 0, b = 0)),
          legend.title.align = 0.5,
          legend.title = element_text(vjust = 1, size = 5),
          legend.margin = margin(t = -0.5, unit = 'mm'),
          legend.key.width = unit(lgnd_mm[1], units = 'mm'),
          legend.key.height = unit(lgnd_mm[2], units = 'mm'),
          axis.text = element_text(colour = 'black'),
          axis.text.x = element_text(margin = margin(t = -0.2, b = -1, unit = 'mm')),
          axis.title = element_blank(),
          axis.ticks = element_blank(),
          strip.background = element_blank(),
          panel.background = element_blank(),
          panel.grid = element_blank(),
          plot.background = element_blank()) -> p_hm
  
  # Extract genes from df ----------------------
  df |> 
    select(Sample, ID, Description, geneID) |>
    unique() |>
    group_by(Sample, ID) |>
    # turn character with / separator into list
    mutate(genes_list = list(strsplit(geneID, "/")[[1]]), 
           .before = ID) |>
    ungroup() |>
    # Rename samples
    mutate(Sample = gsub('\\+', 'r', Sample) ) |>
    unnest_wider(genes_list, names_sep = '_g') |>
    pivot_longer(cols = starts_with('genes_list_g'), 
                 values_to = 'Gene') |>
    select(-c(name, geneID)) |>
    # NA introduced for un-nesting
    subset(!is.na(Gene)) |>
    group_by(ID) -> df_long
  
  # get all unique genes
  unique_genes <- unique(df_long$Gene)
  unique_IDs <- unique(df_long$ID)
  unique_descript <- unique(df_long$Description)
  
  # Check in each GO term how many genes there are per sample ----------------------
  df_occurance <- list()
  
  for (g in 1:length(unique_IDs)) {
    df_go <- subset(df_long, ID == unique_IDs[g])
    result_list <-  lapply(unique_genes, check_gene_presence, df = df_go)
    
    # Combine the results into a single data frame
    df_result <- as_tibble(do.call(rbind, result_list)) |>
      mutate(ID = unique_IDs[g], .before = Gene)
    
    df_occurance[[g]] <- df_result
  }
  
  do.call(rbind, df_occurance) |> 
    as_tibble() |>
    group_by(ID) |>
    select(-Gene) |>
    mutate(across(where(is.logical), as.integer ) ) |>
    # get occurrence of all combinations
    table() |>
    as.data.frame() |>
    as_tibble() |> 
    mutate(ID = as.character(ID)) -> df2
  
  # Prepare data for bar plot ----------------------
  df2 |>
    # if 1 turn into column name, if zero turn into empty string
    mutate(across(where(is.factor), 
                  .fns = ~ ifelse(.x == 1, yes = cur_column(), 
                                  no = '*'))) |>
    tidyr::unite(col = 'Isect', !contains(c('Freq', 'ID') ) ) |>
    # remove empty intersection
    subset(Isect != '*_*_*' ) |>
    mutate(Isect = fct_reorder(Isect, Freq, .desc = T)) -> df3
  
  # Add description as factor  ----------------------
  df |>
    select(ID, Description, ONTOLOGY) |>
    unique() |>
    left_join(df3, by = join_by(ID)) |>
    mutate(Description = factor(Description, 
                                levels = rev(GO_description_lvl))
           ) -> df4
  
  # Plot bars ----------------------
  ggplot(df4) +
    aes(x = Isect, y = Freq) +
    facet_wrap(~Description, ncol = 1, strip.position = 'right') +
    geom_col(width = 0.75) +
    geom_text(data = subset(df4, Freq >= k_text), size = bar_txt_size,
              aes(label = Freq), nudge_y = -bar_num_nudge,
              colour = 'white') +
    geom_text(data = subset(df4, Freq < k_text), size = bar_txt_size,
              aes(label = Freq), nudge_y = bar_num_nudge,
              colour = 'black') +
    scale_x_discrete(expand = expansion(mult = 0, add = 0)) +
    scale_y_continuous(expand = expansion(mult = c(0, 0.01), add = 0),
                       n.breaks = 3) +
    theme_classic(base_size = 6, base_family = 'Arial') +
    theme(axis.text.x = element_blank(),
          axis.title = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text = element_text(colour = 'black'),
          axis.ticks.length.y = unit(0.5, units = 'mm'),
          strip.background = element_blank(),
          panel.grid.major.y = element_line(colour = 'gray90', 
                                            linewidth = 0.15),
          panel.background = element_blank(),
          panel.border = element_blank(),
          plot.background = element_blank()) -> p_bar
  
  # Prepare data for intersection points   ----------------------
  df2 |>
    # if 1 turn into column name, if zero turn into empty string
    mutate(across(where(is.factor), 
                  .fns = ~ ifelse(.x == 1, yes = cur_column(),
                                  no = '*'))) |>
    tidyr::unite(col = 'Isect', !contains(c('Freq', 'ID') ), 
                 remove = F ) |>
    select(-c(ID, Freq)) |>
    unique() |>
    subset(Isect != '*_*_*' ) |>
    pivot_longer(cols = !starts_with('Isect'), 
                 names_to = 'Sample', values_to = 'Fill') |>
    mutate(Fill = ifelse(Fill == '*', yes = 0L, no = 1L)) |>
    group_by(Isect) |>
    mutate(Num_zeros = sum(Fill)) |>
    ungroup() |>
    # add factors of the bar plot
    mutate(Isect = factor(Isect, level = levels(df4$Isect), 
                          ordered = T ) ) |>
    mutate(Sample = gsub('r', '\\+', Sample) ) -> df5

  # use drop on X-axis see:
  # https://github.com/tidyverse/ggplot2/issues/3197
  df5$Sample <- factor(df5$Sample, levels = rev(unique(df5$Sample)) )
  
  sample_highlight <- which(as.integer(unique(df5$Sample)) %% 2 == 0)
  data.frame(Sample = sample_highlight, 
             start = min(df5$Isect), 
             end = max(df5$Isect)) -> df_highlight
  
  # Plot isect points  ----------------------
  ggplot(df5) +
    aes(x = Isect, y = Sample, fill = factor(Fill), group = Isect ) +
    annotate("rect", xmin = -Inf, xmax = Inf, 
             ymin = sample_highlight-0.25, ymax = sample_highlight+0.25,
             fill = "gray50", alpha = 0.5) +
    geom_path(data = subset(df5, Num_zeros > 1 & Fill > 0 ),
              linewidth = 0.2, colour = 'black', linetype = 'solid') +
    geom_point(shape = 21, size = isect_point_size, 
               show.legend = F, stroke = 0.15) +
    scale_fill_manual(values = c('1' = 'black', '0' = 'white')) +
    scale_x_discrete(expand = expansion(mult =  0.05, add = 0), 
                     drop = FALSE ) +
    scale_y_discrete(expand = expansion(mult = c(0, 0.1), add = 0.) ) +
    coord_cartesian(clip =  'off') +
    labs(x = 'Intersections') +
    theme_classic(base_size = 6, base_family = 'Arial') +
    theme(axis.text.x = element_blank(),
          axis.text.y = element_text(colour = 'black'),
          axis.title.x = element_text(size = 5),
          axis.title.y = element_blank(),
          strip.background = element_blank(),
          axis.line = element_blank(),
          axis.ticks = element_blank(),
          panel.grid = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(), 
          plot.background = element_blank()) -> p_isect
  
  # Combine all into one plot
  # layout <- c(
  #   area(t = 1, b = 50, l =  1, r = 1),
  #   area(t = 1, b = 50, l =  2, r = 2),
  #   area(t = 50, b = 52, l =  2, r = 2)
  # )
  layout <- "
  AB
  CD
  "
  wrap_plots(A = p_hm, B = p_bar, 
             C = guide_area(), D = p_isect, 
             design = layout, guides = 'collect', 
             heights = patch_rel_heights) -> p_final
  return(p_final)
}

Ggplot2 themes.

Code
theme_classic(base_size = 6, base_family = 'Arial') +
  theme(axis.title = element_text(colour = 'black', size = 5),
        axis.title.y = element_text(margin = margin(r = 0, unit = 'mm')),
        axis.title.x = element_blank(),
        axis.line = element_line(linewidth = 0.15),
        axis.ticks.length = unit(1, units = 'mm'),
        axis.ticks.x = element_blank(),
        axis.ticks.y = element_line(colour = 'black', linewidth = 0.15),
        axis.text = element_text(colour = 'black', size = 6.05),
        axis.text.x = element_text(colour = 'black', margin = margin(t = -0.25, unit = 'mm'), vjust = 1),
        legend.position = 'none', 
        panel.grid.major.y = element_line(colour = 'gray90', linewidth = 0.075),
        panel.background = element_blank(),
        panel.border = element_blank(),
        plot.background = element_blank()) -> th_neuro
Code
theme_classic(base_size = 6, base_family = 'Arial') +
    theme(axis.text.y = element_blank(),
          axis.title = element_text(colour = 'black', size = 5),
          axis.title.y = element_text(margin = margin(r = -1, unit = 'mm')),
          axis.title.x = element_text(margin = margin(t = -0.2, unit = 'mm')),
          axis.line = element_line(linewidth = 0.15),
          axis.ticks.length = unit(1, units = 'mm'),
          axis.ticks.x = element_line(colour = 'black', linewidth = 0.15),
          axis.ticks.y = element_blank(),
          axis.text = element_text(colour = 'black'),
          legend.position = 'none', 
          panel.grid.major.x = element_line(colour = 'gray90', linewidth = 0.075),
          panel.background = element_blank(),
          panel.border = element_blank(),
          plot.background = element_blank()) -> GO_terms_barplot_th
Code
theme_classic(base_family = "Arial", base_size = 5) +
  theme(axis.text = element_text(colour = 'black'),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 5),
        axis.ticks.x = element_blank(),
        axis.ticks.length = unit(1, units = 'mm'),
        axis.line = element_line(linewidth = 0.15),
        legend.position = 'none',
        legend.title = element_blank(),
        panel.grid.major.y = element_line(colour = 'gray90', linewidth = 0.075),
        panel.background = element_blank(),
        panel.border = element_blank(),
        plot.background = element_blank() ) -> DEG_counts_barplot_th

Paper’s genotype palette.

Code
genotype_palette <- c('WT' = "#377AA3", 'CSex4' = "#B65120", '∆ex4' = "#F7CB48", 
                      'KO' = "gray", 'KO+L' = "mediumpurple2", 'KO+S' = "darkorange1",
                      'KO+L-3D' = "#74A57F", 'KO+S-3D' = "#E8B4BC")

2.3 Directories & File Paths

Here I organise all the file and directories paths I need to run the analysis and define where to save the processed tables and figures.

Code
oneDrive_Dir <- file.path("~/OneDrive - CRG - Centre de Regulacio Genomica/Suz12_AS_project")  
code_dir_fig <- file.path(oneDrive_Dir, "_Code/Fig7")
tbl_dir_fig <- file.path(code_dir_fig, "tables")
pdf_dir_fig <- file.path(code_dir_fig, "pdfs")

if (!dir.exists(pdf_dir_fig)) { dir.create(pdf_dir_fig, recursive = T) }
if (!dir.exists(tbl_dir_fig)) { dir.create(tbl_dir_fig, recursive = T) }

Path to files in dropbox

Code
dropbox_dir <- file.path('~/Dropbox (CRG ADV)/54_Suz12AS')

round6_path <- file.path(dropbox_dir, 'ROUND6_CGIs/files')
Suz12_CGI_targets_path <- file.path(round6_path, "CGI_Suz12_genes.txt")
stopifnot(file.exists(Suz12_CGI_targets_path))

round22_deseq2_dir <- file.path(dropbox_dir, 'ROUND22_RNAseq20SamplesMolCellReview/STATS')
stopifnot(dir.exists(round22_deseq2_dir))

round22_enrich_dir <- file.path(dropbox_dir, 'ROUND22_RNAseq20SamplesMolCellReview/DEGs')
stopifnot(dir.exists(round22_enrich_dir))

round22_rpkm_dir <- file.path(dropbox_dir, 'ROUND22_RNAseq20SamplesMolCellReview/RPKMs')
stopifnot(dir.exists(round22_rpkm_dir))

DESeq2 results file path. Pay attention that the contrast of WT vs KO is inverted and not like WT vs KO+L/S, so I have to invert the log2Fold Changes.

Code
KO_path_NPC   <- file.path(round22_deseq2_dir, 'A-3_KO_B-3_WT_stats_paired.txt')
KOrL_path_NPC <- file.path(round22_deseq2_dir, 'A-3_WT_B-3_KO-L_stats_paired.txt')
KOrS_path_NPC <- file.path(round22_deseq2_dir, 'A-3_WT_B-3_KO-S_stats_paired.txt')

3 Main Figure Panels

3.1 MA plot NPCs

This function save to pdf directly to ~/OneDrive - CRG - Centre de Regulacio Genomica/Suz12_AS_project/_Code/Fig7/pdfs

Plot ∆ex4 / WT MA plot (figure 7B)

Code
dEx4_path_NPC <- file.path(round22_deseq2_dir, 'A-2_WT_B-2_DELTAex4_stats_paired.txt')
set.seed(16)
plot_MA_fig(file_path = dEx4_path_NPC, sample_name_contrast = '∆ex4', Panel_Num = '7B', label = T)
Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggrepel: 5 unlabeled data points (too many overlaps). Consider increasing max.overlaps

Check how many genes are up- or down- regulated.

Code
import_res(dEx4_path_NPC) |>
  group_by(direction) |>
  summarise(Num_DEGs = n())
# A tibble: 3 × 2
  direction Num_DEGs
  <fct>        <int>
1 NONE         20903
2 UP               5
3 DOWN             9

Plot CSex4 / WT MA plot (figure 7C)

Code
CSx4_path_NPC <- file.path(round22_deseq2_dir, 'A-2_WT_B-2_CSex4_stats_paired.txt')
plot_MA_fig(file_path = CSx4_path_NPC, sample_name_contrast = 'CSex4', Panel_Num = '7C')

Check how many genes are up- or down- regulated.

Code
import_res(CSx4_path_NPC) |>
  group_by(direction) |>
  summarise(Num_DEGs = n())
# A tibble: 3 × 2
  direction Num_DEGs
  <fct>        <int>
1 NONE         20458
2 UP             636
3 DOWN           227

3.2 GO terms CSex4 NPCs

Import the genes that are differentially expressed in CSex4 with an adjusted P-value < 0.1

Up-regulated genes.

Code
CSex4_UP_list <- read.table(file.path(round22_enrich_dir,
                                      "A-2_WT_B-2_CSex4_UPgenelist_paired.txt"))
CSex4_UP_genes <- CSex4_UP_list[,1]
message('Num CSex4 up-regulated genes entering the GO term analysis: ', length(CSex4_UP_genes))
Num CSex4 up-regulated genes entering the GO term analysis: 786

Down-regulated genes.

Code
CSex4_DO_list <- read.table(file.path(round22_enrich_dir, "A-2_WT_B-2_CSex4_DOWNgenelist_paired.txt"))
CSex4_DO_genes <- CSex4_DO_list[,1]
message('Num CSex4 down-regulated genes entering the GO term analysis: ', length(CSex4_DO_genes))
Num CSex4 down-regulated genes entering the GO term analysis: 349

GO terms enrichment analysis using ClusterProfiler:

Up-regulated genes

Code
res <- enrichGO(gene          = CSex4_UP_genes,
                OrgDb         = org.Mm.eg.db,
                keyType       = 'SYMBOL',
                ont           = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.05,
                qvalueCutoff  = 0.1)

CSex4_GO_UP <- as_tibble(res)

Down-regulated genes

Code
res <- enrichGO(gene          = CSex4_DO_genes,
                OrgDb         = org.Mm.eg.db,
                keyType       = 'SYMBOL',
                ont           = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.05,
                qvalueCutoff  = 0.1)

CSex4_GO_DO <- as_tibble(res)

CSex4 vs WT NPC all GO terms for up-regulated genes. To simplify the html widget below I filter the actual number of GO terms to be displayed, focusing on only biological processes GO terms that are very significant.

Code
CSex4_GO_UP |>
  subset(ONTOLOGY == "BP" & p.adjust < 0.00001) |>
  select(-geneID) |>
  mutate(across(.cols = c(ends_with('value'), 'p.adjust'), .fns = \(x) signif(x, digits = 2)  ) ) |>
  datatable(rownames = FALSE, filter = 'top', 
            options = list(pageLength = 5, autoWidth = TRUE) )

CSex4 vs WT NPC all GO terms for down-regulated genes. Filter like above

Code
CSex4_GO_DO |>
  subset(ONTOLOGY == "BP" & p.adjust < 0.00001) |>
  select(-geneID) |>
  mutate(across(.cols = c(ends_with('value'), 'p.adjust'), .fns = \(x) signif(x, digits = 2)  ) ) |>
  datatable(rownames = FALSE, filter = 'top', 
            options = list(pageLength = 5, autoWidth = TRUE) )

Select best top significantly enriched GO terms for conveying a message in the figure 7D

Code
top_go_id_up <- c('GO:0007565', 'GO:1903489', 'GO:0001890', 'GO:0030879', 'GO:0001935')

CSex4_GO_UP|>
  subset(ID %in% top_go_id_up) |>
  mutate(Description = str_wrap(Description, width = 40)) |>
  mutate(Description = fct_reorder(Description, rev(p.adjust)) )  |>
  ggplot() +
    geom_col(aes(x = -log10(p.adjust),  y = Description, fill = -log10(p.adjust)), 
             colour = 'black', linewidth = 0.2 ) +
    geom_text(aes(label = Description, x = 0.15, y = Description), hjust = 0, size = 1.75 ) +
    scale_fill_gradient(low = '#fad7d9', high = '#E63945') +
    scale_x_continuous(expand = expansion(mult = c(0, 0.05)), n.breaks = 6 ) +
    coord_cartesian(xlim = c(0, NA)) +
    labs(x = expression(-log[10] ~ "adj. P-value"),
         y = "GO terms") + GO_terms_barplot_th -> p_CSex4_GO_UP
p_CSex4_GO_UP

Save to pdf.

Code
ggsave(path = pdf_dir_fig, plot = p_CSex4_GO_UP, device = cairo_pdf, units = "cm",
       filename = paste0("Fig7D_GO_terms_UP_CSex4_vs_WT_bars.pdf"),
       width = 3.25, height = 3.05)

Plot GOs of down-regulated genes

Code
top_go_id_down <- c('GO:0007389', 'GO:0001655', 'GO:0050767', 'GO:0007409', 'GO:0030900')

CSex4_GO_DO |>
  subset(ID %in% top_go_id_down) |>
  mutate(Description = str_wrap(Description, width = 40)) |>
  mutate(Description = fct_reorder(Description, rev(p.adjust)) )  |>
  ggplot() +
    geom_col(aes(x = -log10(p.adjust),  y = Description, fill = -log10(p.adjust)), 
             colour = 'black', linewidth = 0.2 ) +
    geom_text(aes(label = Description, x = 0.2, y = Description), hjust = 0, size = 1.75 ) +
    scale_fill_gradient(low = '#C0CCDD', high = '#305890') +
    scale_x_continuous(expand = expansion(mult = c(0, 0.05)), n.breaks = 6 ) +
    coord_cartesian(xlim = c(0, NA)) +
    labs(x = expression(-log[10] ~ "adj. P-value"),
         y = "GO terms") + GO_terms_barplot_th -> p_CSex4_GO_DO
p_CSex4_GO_DO

Save to pdf.

Code
ggsave(path = pdf_dir_fig, plot = p_CSex4_GO_DO, device = cairo_pdf, units = "cm",
       filename = paste0("Fig7E_GO_terms_DOWN_CSex4_vs_WT_bars.pdf"),
       width = 3.25, height = 3.05)

3.3 CSex4 up-regulated genes expression levels

Check the RPKM of the CSex4 placental up-regulated genes in all cell lines.

Code
CSex4_GO_UP|>
  subset(ID %in% top_go_id_up) |>
  select(ID, geneID) |>
  group_by(ID) |>
    # turn character with / separator into list
    mutate(genes_list = list(strsplit(geneID, "/")[[1]]), 
           .before = ID) |>
    ungroup() |>
    unnest_wider(genes_list, names_sep = '_g') |>
    pivot_longer(cols = starts_with('genes_list_g'), 
                 values_to = 'Gene') |>
    select(-c(name, ID, geneID)) |>
    subset(!is.na(Gene)) |>
    unique() |> arrange(Gene) |> pull(Gene) -> up_CSex4_genes_GOs
message('Retrieved ', length(up_CSex4_genes_GOs), ' CSex4 up genes in top GO terms')
Retrieved 67 CSex4 up genes in top GO terms

Define samples’ genotype

Code
WT_samples <- c("WTp", "WT#1a", "WT#1b", "WT#2")
CSex4_samples <- c("CSex4#1", "CSex4#2")
dEx4_samples <- c("∆ex4#1", "∆ex4#2")
KO_samples <- c("KO#1", "KO#3a", "KO#3b", "KO#4")
KOrL_samples <- c("KO+L#1", "KO+L#2")
KOrS_samples <- c("KO+S#1", "KO+S#2")
KOrL3D_samples <- c("KO+L-3D#1", "KO+L-3D#2")
KOrS3D_samples <- c("KO+S-3D#1", "KO+S-3D#2")

Import RPKMs from the NPCs RNA-seq

Code
import_rpkms(base_path = round22_rpkm_dir, file_name = 'A_B2_B_B3_RPKMs_paired.txt',
             sample_order = c("WTp", "WT#1a", "CSex4#1", "CSex4#2", "∆ex4#1", "∆ex4#2",
                              "KO#3a", "KO#4", "WT#1b", "WT#2", "KO#1", "KO#3b",
                              "KO+L#1", "KO+L#2", "KO+S#1", "KO+S#2") ) |>
  subset(!Pretty_Sample %in% c('KO#1', 'KO#4') ) |>
  mutate(Genotype = case_when(Pretty_Sample %in% WT_samples ~ 'WT',
                                Pretty_Sample %in% CSex4_samples ~ 'CSex4',
                                Pretty_Sample %in% dEx4_samples ~ '∆ex4',
                                Pretty_Sample %in% KO_samples ~ 'KO',
                                Pretty_Sample %in% KOrL_samples ~ 'KO+L',
                                Pretty_Sample %in% KOrS_samples ~ 'KO+S') ) |>
    mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4', 
                                                  'KO', 'KO+L', 'KO+S') ) ) |>
  relocate(Genotype, .before = external_gene_name) |> 
  relocate(Pretty_Sample, .after = Genotype) -> df_rpkm

Display NPCs gene RPKMs.

Since the df_rpkm dataframe with all the genes contains 366394 rows, I filter out the lowly expressed one and pick the most abundant one to display here below.

Code
df_rpkm |> 
  group_by(external_gene_name) |> 
  summarise(mean_rpkm = mean(RPKM, na.rm = T)) |>
  subset(mean_rpkm >= 50) |>
  pull(external_gene_name) |> unique() -> top_NPCs_genes_to_display
message('Filtering to display ', length(top_NPCs_genes_to_display), ' genes')
Filtering to display 1457 genes

Display this info as an interactive searchable data table

Code
df_rpkm |>
  subset(external_gene_name %in% top_NPCs_genes_to_display) |>
  mutate(across(.cols = 'RPKM', .fns = \(x) round(x, digits = 3)  ) ) |>
  datatable(rownames = FALSE, filter = 'top', 
            options = list(pageLength = 5, autoWidth = TRUE) )

Filter RPKMs for the genes found in the significantly enriched GO terms in the CSex4 upregulated genes.

Code
df_rpkm |>
  subset(external_gene_name %in% up_CSex4_genes_GOs) -> df_fltrd_rpkm

Show the RPKMs in the genes found in the GO terms enriched in the up-regulated genes of CSex4.

Start typing in the external gene name column Prl and see how many Prolactin genes are up-regulated in NPCs.

Code
df_fltrd_rpkm |>
  mutate(across(.cols = 'RPKM', .fns = \(x) round(x, digits = 3)  ) ) |>
  datatable(rownames = FALSE, filter = 'top', 
            options = list(pageLength = 5, autoWidth = TRUE) )

Show up genes as boxplot

Code
df_fltrd_rpkm |> 
  mutate(log2_RPKM = log2(RPKM)) |>
  subset(!is.na(log2_RPKM)) |>
  subset(is.finite(log2_RPKM)) -> df_fltrd_rpkm_log2

num_go_genes_CSex4_up <- length(unique(df_fltrd_rpkm_log2$external_gene_name))
ggplot(df_fltrd_rpkm_log2) +
  aes(x = Genotype, y = log2_RPKM, fill = Genotype) +
  geom_boxplot(show.legend = F, outlier.shape = NA, width = 0.75, linewidth  = 0.2) +
  scale_fill_manual(values = genotype_palette) + 
  labs(y = expression("RPKM (" ~ log[2] ~ ")" ) ) +
  th_neuro -> p_CSex4_up_go_gene
p_CSex4_up_go_gene

Save to pdf.

Code
ggsave(filename = paste0("FigS7B_Expression_GO_genes_CSex4_up_n", num_go_genes_CSex4_up, ".pdf"),
       device = cairo_pdf,  path = pdf_dir_fig, units = 'cm', width = 4.1, height = 5)

Reshape these genes into a matrix.

Code
df_fltrd_rpkm |>
  pivot_wider(id_cols = external_gene_name, names_from = Pretty_Sample, values_from = RPKM) |>
  column_to_rownames('external_gene_name') |>
  as.matrix() -> mat_rpkm_reps

z-Score the matrix and remove non-finite genes that arise from the scaling due to division by zero (rowSds = 0).

Code
scld_mat_rpkm_reps <- (mat_rpkm_reps - rowMeans(mat_rpkm_reps)) / matrixStats::rowSds(mat_rpkm_reps)

scld_mat_rpkm_reps <- scld_mat_rpkm_reps[which(apply(scld_mat_rpkm_reps, 1, function(x) { all(is.finite(x)) } )), ]

message('Num of genes in rows: ', nrow(scld_mat_rpkm_reps), "\n",
        'Num of samples in columns: ', ncol(scld_mat_rpkm_reps))
Num of genes in rows: 67
Num of samples in columns: 14

Prepare a colour palette. Use floor and ceiling to deal with even/odd length palette lengths

Code
palette_high_low <- colorRampPalette(colors = c('#1D3557', "white",  '#E63945'))(20)
paletteLength <- length(palette_high_low)

breaks_mat_mean <- c(seq( min(scld_mat_rpkm_reps), 0, length.out = ceiling(paletteLength / 2) + 1),
                     seq( max(scld_mat_rpkm_reps) / paletteLength, max(scld_mat_rpkm_reps),
                          length.out = floor(paletteLength / 2)) )

Plot heatmap

Code
pheatmap(mat = t(scld_mat_rpkm_reps), color = palette_high_low, cellwidth = 8,
         cellheight = 8, border_color = 'grey16', fontsize_col = 8,
         breaks = breaks_mat_mean, fontsize = 5, cluster_rows = T,
         cluster_cols = T, show_colnames = T, treeheight_col = 0, 
         treeheight_row = 50)

Save to pdf.

Code
pdf.options(encoding = 'ISOLatin2.enc')
pdf(file = file.path(pdf_dir_fig, paste0("FigExtra_RPKMs_UP_CSex4_genes_zScore_heatmap_n", 
                                         nrow(scld_mat_rpkm_reps), ".pdf") ),
    width = 9.35, height = 2.5)
pheatmap(mat = t(scld_mat_rpkm_reps), color = palette_high_low, cellwidth = 8,
         cellheight = 8, border_color = 'grey16', fontsize_col = 8,
         breaks = breaks_mat_mean, fontsize = 5, cluster_rows = T,
         cluster_cols = T, show_colnames = T, treeheight_col = 0, 
         treeheight_row = 50)
dev.off()
pdf 
  3 

Now check all up-regulated genes in CSex4.

Code
import_res(CSx4_path_NPC) |>
  subset(direction == 'UP') |>
  pull(external_gene_name) |> unique() -> up_CSex4_genes

Reshape all up-genes into a matrix.

Code
df_rpkm |>
  subset(external_gene_name %in% up_CSex4_genes) -> df_fltrd_rpkm

df_fltrd_rpkm |>
  pivot_wider(id_cols = external_gene_name, names_from = Pretty_Sample, values_from = RPKM) |>
  column_to_rownames('external_gene_name') |>
  as.matrix() -> mat_rpkm_reps

scld_mat_rpkm_reps <- (mat_rpkm_reps - rowMeans(mat_rpkm_reps)) / matrixStats::rowSds(mat_rpkm_reps)

scld_mat_rpkm_reps <- scld_mat_rpkm_reps[which(apply(scld_mat_rpkm_reps, 1, function(x) { all(is.finite(x)) } )), ]

message('Num of genes in rows: ', nrow(scld_mat_rpkm_reps), "\n",
        'Num of samples in columns: ', ncol(scld_mat_rpkm_reps))
Num of genes in rows: 636
Num of samples in columns: 14
Code
breaks_mat_mean <- c(seq( min(scld_mat_rpkm_reps), 0, length.out = ceiling(paletteLength / 2) + 1),
                     seq( max(scld_mat_rpkm_reps) / paletteLength, max(scld_mat_rpkm_reps), 
                          length.out = floor(paletteLength / 2)) )

Plot heatmap of all up-regulate genes.

Code
pheatmap(mat = t(scld_mat_rpkm_reps), color = palette_high_low,
         breaks = breaks_mat_mean, fontsize = 5, cluster_rows = T, 
         border_color = NA, cluster_cols = T, show_colnames = F, 
         treeheight_col = 0, treeheight_row = 10)

Nice to see how the ∆ex4, WT, and KO+S are all very similar and close to each other.

Code
pdf.options(encoding = 'ISOLatin2.enc')
pdf(file = file.path(pdf_dir_fig, paste0("FigExtra_RPKMs_UP_CSex4_genes_zScore_heatmap_n", 
                                         nrow(scld_mat_rpkm_reps), ".pdf") ),
    width = 9.35, height = 2.5)
pheatmap(mat = t(scld_mat_rpkm_reps), color = palette_high_low,
         breaks = breaks_mat_mean, fontsize = 5, cluster_rows = T, 
         border_color = NA, cluster_cols = T, show_colnames = F, 
         treeheight_col = 0, treeheight_row = 10)
dev.off()
pdf 
  3 

3.4 PCA

Make a metadata dataframe to use for colouring the samples in the PCA

Code
data.frame(Pretty_Sample = c('WTp', 'WT#1a', 'CSex4#1', 'CSex4#2',
                             '∆ex4#1', '∆ex4#2', 'KO#3a',
                             'WT#1b', 'WT#2', 'KO#3b',
                             'KO+L#1', 'KO+L#2', 'KO+S#1', 'KO+S#2'),
           Genotype = c(rep('WT', 2), rep('CSex4', 2), rep('∆ex4', 2),
                        rep('KO', 1), rep('WT', 2), rep('KO', 1),
                        rep('KO+L', 2), rep('KO+S', 2) ) ) -> mtdt_npc

mtdt_npc$Pretty_Sample <- factor(mtdt_npc$Pretty_Sample, levels = mtdt_npc$Pretty_Sample )
mtdt_npc$Genotype <- factor(mtdt_npc$Genotype, levels = unique(mtdt_npc$Genotype) )

Make a PCA to check samples

Code
df_rpkm |>
  pivot_wider(id_cols = external_gene_name, names_from = Pretty_Sample, values_from = RPKM) |>
  column_to_rownames('external_gene_name') |>
  as.matrix() -> mat_all_rpkm_reps

# filter for row means >= 10
mat_fltrd_rpkm_reps <- (mat_all_rpkm_reps[(rowMeans(mat_all_rpkm_reps) >= 10), ])

PCA 1 vs 2

Code
showme_PCA2D(mat = mat_fltrd_rpkm_reps, mt = mtdt_npc, mcol = 'Pretty_Sample', 
             m_label = 'Pretty_Sample', scale. = T, 
             n_top_var = 1000, m_fill = 'Genotype') + scale_fill_manual(values = genotype_palette)

PCA 1 vs 3

Code
showme_PCA2D(mat = mat_fltrd_rpkm_reps, y = 3, mt = mtdt_npc, mcol = 'Pretty_Sample', 
             m_label = 'Pretty_Sample', scale. = T, 
             n_top_var = 1000, m_fill = 'Genotype') + scale_fill_manual(values = genotype_palette)

PCA 2 vs 3

Code
showme_PCA2D(mat = mat_fltrd_rpkm_reps, x = 2, mt = mtdt_npc, mcol = 'Pretty_Sample', 
             m_label = 'Pretty_Sample', scale. = T, 
             n_top_var = 1000, m_fill = 'Genotype') + scale_fill_manual(values = genotype_palette)

Show variance explained by each principal component.

Code
showme_PCA2D(mat = mat_fltrd_rpkm_reps, scale. = T, 
             n_top_var = 1000, show_variance = T, real_aspect_ratio = F,
             mt = mtdt_npc, mcol = 'Pretty_Sample', 
             m_label = 'Pretty_Sample',
             m_fill = 'Genotype') + scale_fill_manual(values = genotype_palette)

3.5 Neurons

Import data from the neuronal quantification.

Code
measurments_path <- file.path(tbl_dir_fig, 'Neuronal_Nuclei_Measurments.tab')

neurons_measurments <- read_delim(file = measurments_path, delim = '\t', 
                                  quote = '', col_names = T, show_col_types = FALSE) |>
  mutate(Area_Perim = Area/Perimeter)

neurons_measurments$Genotype <- factor(neurons_measurments$Genotype, 
                                       levels = c('WT', 'CSex4', '∆ex4', 'KO', 'KO+L', 'KO+S')) 

Calculate basic stats

Code
neurons_measurments |>
  wilcox_test(formula = Area ~ Genotype, ref.group = 'WT', paired = F, p.adjust.method = 'BH', 
              exact = T, alternative = 'two.sided') |>
  mutate(p = signif(p, 1)) |>
  mutate(p_signif = case_when(p < 2.2e-16 ~ paste0("list(~italic(p)", "<2.2%.%10^-16", ")" ),
                              between(p, left = 2.2e-16, right = 0.05) ~ paste0("list(~italic(p)==", p, ")" ),
                              p >= 0.05 ~ 'list(~italic(N.S.))') ) |>
  mutate(p_signif = gsub("e-", "%.%10^", p_signif)) |>
  select(group1, group2, n1, n2, p, p_signif)  -> p_anno_df

neurons_measurments |>
    cohens_d(formula = Area ~ Genotype, var.equal = T, ref.group = "WT", paired = F) |>
    select(group1, group2, effsize, magnitude) |>
    mutate(effsize =  paste0("list(~d==", abs(signif(effsize, 2)) ,")") ) |>
    left_join(x = p_anno_df, by = c("group1", "group2") ) |>
    mutate(signig_label = paste0( gsub(")$", "", p_signif), gsub("^list\\(", "", effsize) ) ) |>
    relocate(signig_label, .after = group2) -> p_anno_df

Plot area of the neuronal nuclei as boxplot.

Code
ggplot(neurons_measurments) +
  aes(x = Genotype, y = log2(Area), fill = Genotype ) + #  group = interaction(Sample, ROI)
  geom_boxplot(show.legend = F, linewidth  = 0.15, outlier.shape = NA, width = 0.85) +
  # geom_violin(draw_quantiles = 0.5, show.legend = F, linewidth  = 0.2) +
  # geom_sina(shape = 21, size = 0.5, stroke = 0.2) +
  geom_signif(comparisons = list( c("WT", "CSex4"), c("WT", "∆ex4"), 
                                  c("WT", "KO"), c("WT", "KO+L"), c("WT", "KO+S")), 
              y_position = seq(8.5, 12, by = 0.75),
            parse = T, annotations = p_anno_df$signig_label,
            textsize = 2, family = "Arial", lwd = 0.2, vjust = 0.2,
            tip_length = 0.02)  +
  scale_x_discrete(expand = expansion(mult = c(0.13, 0.01), add = 0)) +
  scale_y_continuous( n.breaks = 10, limits = c(3, 12.75), expand = expansion(mult = 0, add = 0)) +
  scale_fill_manual(values = genotype_palette)  +
  labs(y = expression("Nuclei area (" ~ log[2] ~ ")" ) ) +
  th_neuro -> p_area
p_area

Save area plot to pdf.

Code
ggsave(filename = 'Fig7G_Neurons_Nuclei_Area_Boxplot.pdf', 
       plot = p_area, device = cairo_pdf, units = 'cm',
       path = pdf_dir_fig, width = 4.3, height = 4.75)

Plot other parameters and split the genotypes in each ROI (i.e. the cover-slide that was imaged or different field of view).

Nuclear Perimeter (log2)

Code
ggplot(neurons_measurments) +
  aes(x = Genotype, y = log2(Perimeter), fill = Genotype, group = interaction(Sample, ROI) ) +
  geom_boxplot(show.legend = F, linewidth  = 0.15, outlier.shape = NA, width = 0.75) +
  scale_x_discrete(expand = expansion(mult = c(0.13, 0.01), add = 0)) +
  # scale_y_continuous( n.breaks = 10, expand = expansion(mult = 0, add = 0)) +
  scale_fill_manual(values = genotype_palette)  +
  labs(y = expression("Nuclear perimeter (" ~ log[2] ~ ")" ) ) +
  th_neuro -> p_perimeter
p_perimeter

Nuclear aspect ratio

Code
ggplot(neurons_measurments) +
  aes(x = Genotype, y = AspectRatio, fill = Genotype, group = interaction(Sample, ROI) ) +
  geom_boxplot(show.legend = F, linewidth  = 0.15, outlier.shape = 21, 
               width = 0.75, outlier.size = 0.75, outlier.stroke = 0.2) +
  scale_x_discrete(expand = expansion(mult = c(0.13, 0.01), add = 0)) +
  scale_y_continuous( n.breaks = 10, expand = expansion(mult = c(0, 0.05), add = 0)) +
  scale_fill_manual(values = genotype_palette)  +
  coord_cartesian(ylim = c(0, NA)) +
  labs(y = expression("Nuclear aspect ratio" ) ) +
  th_neuro -> p_AR
p_AR

Save to pdf aspect ratio of neurons nuclei

Code
ggsave(filename = 'FigExtra_Neurons_Nuclei_AspectRatio_Boxplot.pdf', plot = p_AR, device = cairo_pdf, 
       path = pdf_dir_fig, width = 1.75, height = 1.75)

Area over perimeter

Code
ggplot(neurons_measurments) +
  aes(x = Genotype, y = Area_Perim, fill = Genotype, group = interaction(Sample, ROI) ) +
  geom_boxplot(show.legend = F, linewidth  = 0.15, outlier.shape = 21, 
               width = 0.75, outlier.size = 0.75, outlier.stroke = 0.2) +
  scale_x_discrete(expand = expansion(mult = c(0.13, 0.01), add = 0)) +
  scale_y_continuous( n.breaks = 10, expand = expansion(mult = c(0, 0.05), add = 0)) +
  scale_fill_manual(values = genotype_palette)  +
  coord_cartesian(ylim = c(0, NA)) +
  labs(y = expression("Nuclear area / perimeter" ) ) +
  th_neuro -> p_area_perimeter
p_area_perimeter

Save to pdf area divided by perimeter of neurons nuclei

Code
ggsave(filename = 'FigExtra_Neurons_Nuclei_AreaPerimeter_Boxplot.pdf', plot = p_area_perimeter, 
       device = cairo_pdf, 
       path = pdf_dir_fig, width = 1.75, height = 1.75)

4 Supplementary Figure Panels

4.1 MA plots NPCs

Manually invert KO vs WT because it’s done as WT vs KO right now.

Code
# plot_MA_fig(file_path = KO_path_NPC, sample_name_contrast = 'KO', Panel_Num = 'S6X', Y_lim = c(-10, 10))

MA plot NPCs KO+L

Code
plot_MA_fig(file_path = KOrL_path_NPC, sample_name_contrast = 'KO+L', Panel_Num = 'Extra', Y_lim = c(-10, 10))

MA plot NPCs KO+S

Code
plot_MA_fig(file_path = KOrS_path_NPC, sample_name_contrast = 'KO+S', Panel_Num = 'Extra', Y_lim = c(-10, 10))

4.2 DEG counts NPCs

Calculate number of DEGs in NPCs

Code
rbind( summarise_DEGs(file_path = dEx4_path_NPC, sample_name_contrast = '∆ex4'),
       summarise_DEGs(file_path = CSx4_path_NPC, sample_name_contrast = 'CSex4'),
       summarise_DEGs(file_path = KO_path_NPC, sample_name_contrast = 'KO'),
       summarise_DEGs(file_path = KOrL_path_NPC, sample_name_contrast = 'KO+L'),
       summarise_DEGs(file_path = KOrS_path_NPC, sample_name_contrast = 'KO+S') ) |> 
  subset(direction != 'NONE') |>
  mutate(sample = str_remove(pattern = '_vs_WT', contrast)) |>
  mutate(sample = factor(sample, levels = c('CSex4', '∆ex4', 'KO', 'KO+L', 'KO+S') ) ) |>
  mutate(Num_genes = ifelse(direction == 'DOWN', yes = -Num_genes, no = Num_genes) ) -> num_genes_df_NPCs

Plot number of differentially expressed genes (DEGs) per condition / WT (figure S6E)

Code
txt_size <- 1.55
ggplot(num_genes_df_NPCs) +
  aes(x = sample, y = Num_genes, fill = direction) +
  geom_col(colour = 'black', width = 0.75, linewidth = 0.15) +
  geom_text(data = subset(num_genes_df_NPCs, direction == 'UP'), 
            aes(label = Num_genes), vjust = -1, family = "Arial", size = txt_size) +
  geom_text(data = subset(num_genes_df_NPCs, direction == 'DOWN'), 
            aes(label = abs(Num_genes)), vjust = 1.5, family = "Arial", size = txt_size) +
  scale_y_continuous(n.breaks = 10, expand = expansion(mult = c(0.03, 0), add = 0) ) +
  scale_fill_manual(values = c('UP' = '#E63945', 'DOWN' = '#1D3557'),
                    labels = c('Down', 'Up'), name = "Diff. expr. genes") +
  coord_cartesian(ylim = c(-3750, 3750), clip = 'off' ) +
  labs(y = "Number DEGs") + DEG_counts_barplot_th -> p_Num_DEGs_NPCs
p_Num_DEGs_NPCs

Save to pdf.

Code
ggsave(path = pdf_dir_fig, plot = p_Num_DEGs_NPCs, device = cairo_pdf, units = "cm",
       filename = "FigS7A_BarPlot_Quantification_DEGs.pdf",
       width = 5, height = 4)

4.3 Repression index NPCs

Calculate rescue efficiency ratio (a.k.a. repression index for Suz12) in NPCs similar to how it was done for NPCs.

One caveat of this analysis is that the rescue index here is calculated on genes that are Suz12 targets in mESCs, since we don’t have SUZ12 ChIP-seq data that a compromise we had to do.

Code
data.frame(Pretty_Sample = c('WTp', 'WT#1a', 'CSex4#1a', 'CSex4#1b',
                             '∆ex4#1', '∆ex4#2', 'KO#3a', 'KO#4',
                             'WT#1b', 'WT#2', 'KO#1', 'KO#3b',
                             'KO+L#1', 'KO+L#2', 'KO+S#1', 'KO+S#2'),
           Genotype = c(rep('WT', 2), rep('CSex4', 2), rep('∆ex4', 2),
                        rep('KO', 2), rep('WT', 2), rep('KO', 2),
                        rep('KO+L', 2), rep('KO+S', 2) ) ) -> mtdt_npc

mtdt_npc$Pretty_Sample <- factor(mtdt_npc$Pretty_Sample, levels = mtdt_npc$Pretty_Sample )
mtdt_npc$Genotype <- factor(mtdt_npc$Genotype, levels = unique(mtdt_npc$Genotype) )

Import RPKMs of NPCs

Code
rpkm_npc <- import_rpkms(base_path = round22_rpkm_dir,
                        file_name = 'A_B2_B_B3_RPKMs_paired.txt',
                        sample_order = c('WTp', 'WT#1a', 'CSex4#1a', 'CSex4#1b',
                                         '∆ex4#1', '∆ex4#2', 'KO#3a', 'KO#4',
                                         'WT#1b', 'WT#2', 'KO#1', 'KO#3b',
                                         'KO+L#1', 'KO+L#2', 'KO+S#1', 'KO+S#2')) |>
  left_join(y = mtdt_npc, by = join_by(Pretty_Sample) ) |>
  mutate(Pretty_Sample = factor(Pretty_Sample, levels = mtdt_npc$Pretty_Sample)) |>
  relocate(Genotype, .before = Pretty_Sample) |>
  print(3)
# A
#   tibble:
#   418,736
#   ×
#   4
# ℹ 418,726 more rows
# ℹ 4 more variables: external_gene_name <chr>, Genotype <fct>, Pretty_Sample <fct>, RPKM <dbl>

Check Suz12 expression levels in NPCs

Code
subset(rpkm_npc, external_gene_name == 'Suz12') |>
  ggplot() +
  aes(x = Genotype, y = RPKM, fill = Genotype) +
  geom_boxplot(show.legend = F, linewidth = 0.2, outlier.shape = NA) +
  geom_point(show.legend = F, size = 0.75) +
  scale_fill_manual(values = genotype_palette) +
  scale_y_continuous(limits = c(0, NA), n.breaks = 6 ) +
  theme_classic() +
  theme(axis.text = element_text(colour = 'black'),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour = 'gray84', linewidth = 0.2))

Filter only for samples used in the analysis

Code
discard_samples <- c('WTp', 'WT#1a', 'CSex4#1a', 'CSex4#1b',
                     '∆ex4#1', '∆ex4#2','KO#4', 'KO#1')
rpkm_npc <- subset(rpkm_npc, !Pretty_Sample %in% discard_samples) 

Apply rescue index formula to genes that have at least a mean expression higher than zero across all samples and that are up-regulated in SUZ12-KO

Get genes up in KO. Note here the contrast was inverted where KO is the reference, so the UP regulated genes in the KO are the “down” regulated (meaning more expressed in KO and less expressed in WT)

Code
KO_path_NPC <- file.path(round22_deseq2_dir, 'A-3_KO_B-3_WT_stats_paired.txt')

KO_NPC <- read_delim(file = KO_path_NPC, delim = '\t', show_col_types = F)
signif_thrshld <- 0.05
# annotate res
KO_NPC |>
    mutate(signif = case_when(padj <= signif_thrshld ~ 'YES',
                              padj > signif_thrshld ~ 'NO') ) |>
    mutate(direction = case_when(signif == 'YES' & log2FoldChange >= 0 ~ 'UP',
                                 signif == 'YES' & log2FoldChange < 0 ~ 'DOWN',
                                 signif == 'NO' | is.na(padj) ~ 'NONE') ) |>
    # order point for plotting
    mutate(direction = factor(direction, levels = c('NONE', 'UP', 'DOWN'))) |>
    arrange(direction) |> # baseMean
    # filter out missing values for the plot
    subset(!is.na(log2FoldChange)) |>
    subset(!is.na(baseMean)) -> KO_NPC

# Get only the DEGs in KO 
up_in_ko <- subset(KO_NPC, direction == "DOWN")$external_gene_name
dn_in_ko <- subset(KO_NPC, direction == "UP")$external_gene_name
length(up_in_ko)
[1] 2982
Code
length(dn_in_ko)
[1] 2678
Code
stopifnot(!any(up_in_ko %in% dn_in_ko))
# check how may are Suz12 targeted genes in ESCs
Suz12_CGI_targets <- read_delim(Suz12_CGI_targets_path, delim = '\t', 
                                show_col_types = FALSE, col_names = 'external_gene_name')$external_gene_name

message('Genes targeted by SUZ12 in mESCs: ', length(Suz12_CGI_targets) )
Genes targeted by SUZ12 in mESCs: 2665
Code
Suz12_CGI_targets_inESCs_KO_UP <- Suz12_CGI_targets[which(Suz12_CGI_targets %in% c(up_in_ko))]
message('Genes targeted by SUZ12 in mESCs UP-regulated in NPCs: ', length(Suz12_CGI_targets_inESCs_KO_UP) )
Genes targeted by SUZ12 in mESCs UP-regulated in NPCs: 598
Code
Suz12_CGI_targets_inESCs_KO_DO <- Suz12_CGI_targets[which(Suz12_CGI_targets %in% c(dn_in_ko))]
message('Genes targeted by SUZ12 in mESCs DOWN-regulated in NPCs: ', length(Suz12_CGI_targets_inESCs_KO_DO) )
Genes targeted by SUZ12 in mESCs DOWN-regulated in NPCs: 598
Code
# sanity check
stopifnot(!any(Suz12_CGI_targets_inESCs_KO_UP %in% Suz12_CGI_targets_inESCs_KO_DO))

Filter genes that are not expressed.

Code
rpkm_npc |>
 subset(external_gene_name %in% up_in_ko) |>
  group_by(external_gene_name) |>
  mutate(gene_expression = mean(RPKM, na.rm = TRUE)) |>
  subset(gene_expression > 0) |>
  select(-gene_expression) |>
  ungroup() -> rpkm_npc_fltrd

Filter out genes with zero RPKMs has those give NaN RER.

Code
rpkm_npc_fltrd |>
  group_by(external_gene_name, Genotype) |>
  mutate(Mean_rpkms = mean(RPKM, na.rm = TRUE)) |>
  ungroup() |>
  group_by(external_gene_name) |>
  arrange(desc(Mean_rpkms)) |>
  # scale RPKMs with an extra pseudocount
  mutate(Mean_rpkms = log2(Mean_rpkms + 0.1) ) |>
  select(-c(Pretty_Sample, RPKM)) |>
  unique() |>
  mutate(Mean_rpkms_WT = Mean_rpkms[Genotype == "WT"] ) |>
  # filter for genes that are at least expressed in WT
  subset(Mean_rpkms_WT > 0 ) |>
  # how much a rescue is similar to a WT
  mutate(Difference_to_WT = Mean_rpkms - Mean_rpkms[Genotype == "WT"]) |>
  # How much a gene is changed in KO
  mutate(Difference_KO_WT = Mean_rpkms[Genotype == "KO"] - Mean_rpkms[Genotype == "WT"] ) |>
  mutate(RATIO = Difference_to_WT/Difference_KO_WT) |>
  # Rescue Efficiency Ratio
  mutate(RER = (10^-RATIO) ) |>
  ungroup() |>
  print(n = 5 ) -> rer_rpkm
# A tibble: 7,304 × 8
  external_gene_name Genotype Mean_rpkms Mean_rpkms_WT Difference_to_WT
  <chr>              <fct>         <dbl>         <dbl>            <dbl>
1 Ftl1               KO+L           11.5          10.6            0.947
2 Ftl1               KO             11.3          10.6            0.715
3 Actg1              KO             11.0          10.3            0.691
4 Ftl1               KO+S           11.0          10.6            0.412
5 Actg1              KO+L           10.9          10.3            0.567
# ℹ 7,299 more rows
# ℹ 3 more variables: Difference_KO_WT <dbl>, RATIO <dbl>, RER <dbl>
Code
# Show some examples
rer_rpkm |>
  pivot_longer(cols = c(Mean_rpkms, Difference_to_WT, Difference_KO_WT, RATIO, RER),
               names_to = "Calculus", values_to = "Value") |>
  subset(external_gene_name %in% c("Myl9", 'Krt18', 'Bmp4') ) |>
  ggplot() +
  aes(x = Genotype, y = Value, fill = Genotype)+
  facet_grid(Calculus ~ external_gene_name, scales = "free_y") +
  geom_col(show.legend = F) +
  scale_fill_manual(values = genotype_palette )  +
  theme_bw()

Reshape RER to wide for plot.

Code
rer_rpkm |>
  dplyr::select(external_gene_name, Genotype, RER) |>
  pivot_wider(id_cols = c(external_gene_name), 
              names_from = Genotype, values_from = RER) |>
  print(3) -> rer_wide 
# A
#   tibble:
#   1,826
#   ×
#   5
# ℹ 1,816 more rows
# ℹ 5 more variables: external_gene_name <chr>, `KO+L` <dbl>, KO <dbl>, `KO+S` <dbl>, WT <dbl>
Code
# nrow(rer_wide[which(!is.finite(rer_wide$`KO+S`)), ])
# nrow(rer_wide[which(!is.finite(rer_wide$`KO+L`)), ])

Plot rescue index scatter plot for figure S7D.

Code
max_rer <- 5
rer_wide |>
ggplot() +
  aes(x = `KO+L`, y = `KO+S`) +
  geom_hline(yintercept = 1, linewidth = 0.1, colour = "black" ) +
  geom_vline(xintercept = 1, linewidth = 0.1, colour = "black" ) +
  geom_abline(slope = 1, intercept = 0, linetype = 'dashed', linewidth = 0.1 ) +
  geom_point(fill = 'gray50', size = 0.75, shape = 21, stroke = 0.2, alpha = 1) +
  geom_density2d(colour = 'black', linewidth = 0.1, contour_var = "count", alpha = 0.5) +
  coord_cartesian(clip = 'off') +
  scale_x_continuous(breaks = c(seq(0, max_rer, 1)), oob = scales::oob_squish,
                     expand = expansion(add = 0.1, mult = c(0, 0.01) ),
                     limits = c(0, max_rer))  +
  scale_y_continuous(breaks = c(seq(0, max_rer, 1)), oob = scales::oob_squish,
                     expand = expansion(add = 0.1, mult = c(0, 0.01) ),
                     limits = c(0, max_rer) ) +
  theme_classic(base_size = 5, base_family = "Arial") +
  theme(axis.text = element_blank(),
        axis.title = element_blank(),
        axis.line = element_line(linewidth = 0.2),
        axis.ticks = element_line(linewidth = 0.2),
        axis.ticks.length = unit(1, "mm"),
        panel.background = element_blank(),
        panel.border = element_blank(),
        panel.grid.major = element_line(linewidth = 0.2),
        strip.background = element_blank(),
        legend.position = 'none',
        legend.key.size = unit(1, "mm"),
        plot.background = element_blank(),
        plot.title = element_text(size = 5.5)
  ) -> p_scatter

Make density plot KO+S

Code
ggplot(rer_wide) +
  aes(x = `KO+S`, y = -after_stat(scaled)) +
  coord_flip() +
  geom_density(fill = '#F07E19', linewidth = 0.1) +
  scale_x_continuous(oob = scales::oob_squish,
                     breaks = c(seq(0, max_rer, 1)),
                     expand = expansion(add = 0.1, mult = c(0, 0.01)) ,
                     limits = c(0, max_rer) ) +
  geom_vline(xintercept = 0, linewidth = 0.1, colour = "black" ) +
  geom_vline(xintercept = 1, linewidth = 0.1, colour = "black" ) +
  labs(x = "Suz12-S Repression Index") +
  theme_classic(base_size = 5, base_family = "Arial") +
  theme(axis.text = element_text(colour = 'black'),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.line = element_line(linewidth = 0.2),
        axis.ticks = element_line(linewidth = 0.2),
        axis.ticks.length = unit(1, "mm"),
        axis.ticks.x =  element_blank(),
        panel.background = element_blank(),
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        strip.background = element_blank(),
        legend.position = 'none',
        legend.key.size = unit(1, "mm"),
        plot.background = element_blank(),
        plot.title = element_text(size = 5.5) ) -> p_density_S

Make density plot KO+L

Code
ggplot(rer_wide) +
  aes(x = `KO+L`, y = -after_stat(scaled)) +
  geom_density(fill = '#7B67AB', linewidth = 0.1) +
  geom_vline(xintercept = 1, linewidth = 0.1, colour = "black" ) +
  geom_vline(xintercept = 0, linewidth = 0.1, colour = "black" ) +
  scale_x_continuous(breaks = c(seq(0, max_rer, 1)),
                     oob = scales::oob_squish,
                     expand = expansion(add = 0.1, mult = c(0, 0.01) ),
                     limits = c(0, max_rer)) +
  labs(x = "Suz12-L Repression Index") +
  theme_classic(base_size = 5, base_family = "Arial") +
  theme(axis.text = element_text(colour = 'black'),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.line = element_line(linewidth = 0.2),
        axis.ticks = element_line(linewidth = 0.2),
        axis.ticks.length = unit(1, "mm"),
        axis.ticks.y =  element_blank(),
        panel.background = element_blank(),
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        strip.background = element_blank(),
        legend.position = 'none',
        legend.key.size = unit(1, "mm"),
        plot.background = element_blank(),
        plot.title = element_text(size = 5.5) ) -> p_density_L

Test for significance

Code
# significant test
# ks.test(x = rer_wide$`KO+L`, y = rer_wide$`KO+S`, paired = T, exact = T)
pval_npc_rer <- wilcox.test(x = rer_wide$`KO+L`, y = rer_wide$`KO+S`, paired = T, exact = F)$p.val # if using exact = T I get NaN 
signif(pval_npc_rer, 3)
[1] 1.12e-05

Assemple figure S7D

Code
p_rer_NPC <- p_density_S + p_scatter +  plot_spacer() + p_density_L  +
  plot_layout(widths = c(1, 4), heights = c(4,1) )
p_rer_NPC

Save to pdf.

Code
num_genes <- length(unique(rer_wide$external_gene_name))

ggsave(filename = paste0("FigS7D_Repression_Index_NPCs_n", num_genes, ".pdf"), plot = p_rer_NPC,
       device = cairo_pdf,  path = pdf_dir_fig, units = 'cm', width = 4.5, height = 4.5)

4.4 Upset Rescues

Code
import_res(file_path = KO_path_NPC, invert_l2FC = T) |>
  subset(signif) |>
  pull(external_gene_name) -> KO_DEGs

import_res(file_path = KOrL_path_NPC) |>
  subset(signif) |>
  pull(external_gene_name) -> KOrL_DEGs

import_res(file_path = KOrS_path_NPC) |>
  subset(signif) |>
  pull(external_gene_name) -> KOrS_DEGs

Save an upset plot between all the 3 sets of DEGs

Code
pdf(file = file.path(pdf_dir_fig, paste0("FigExtra_Upset_DEG_Rescues.pdf") ),
    width = 3, height = 3)
UpSetR::upset(fromList(list(KO = KO_DEGs, `KO+L` = KOrL_DEGs, `KO+S` = KOrS_DEGs)),
              order.by = 'freq')
dev.off()
quartz_off_screen 
                2 

4.5 GO terms SUZ12 KO and rescues

KO up and down genes are intentionally inverted as the sample contrast order is inverted.

Code
up_genes_path <- file.path(round22_enrich_dir,
                           "A-3_KO_B-3_WT_DOWNgenelist_paired.txt")
do_genes_path <- file.path(round22_enrich_dir, 
                           "A-3_KO_B-3_WT_UPgenelist_paired.txt")
sample_name_contrast <- 'KO'

GOs_KO <- run_enrichGO_UP_DOWN(up_genes_path = up_genes_path, 
                               do_genes_path =  do_genes_path, 
                               sample_name_contrast = sample_name_contrast)
Number of KO up-regulated genes entering the GO term analysis: 3462
Number of KO down-regulated genes entering the GO term analysis: 3150

GO terms analysis for KO+L

Code
up_genes_path <- file.path(round22_enrich_dir,
                           "A-3_WT_B-3_KO-L_UPgenelist_paired.txt")
do_genes_path <- file.path(round22_enrich_dir,
                           "A-3_WT_B-3_KO-L_DOWNgenelist_paired.txt")
sample_name_contrast <- 'KO+L'

GOs_KOrL <- run_enrichGO_UP_DOWN(up_genes_path = up_genes_path, 
                                 do_genes_path = do_genes_path, 
                                 sample_name_contrast)
Number of KO+L up-regulated genes entering the GO term analysis: 4049
Number of KO+L down-regulated genes entering the GO term analysis: 4045

GO terms analysis for KO+S

Code
up_genes_path <- file.path(round22_enrich_dir,
                           "A-3_WT_B-3_KO-S_UPgenelist_paired.txt")
do_genes_path <- file.path(round22_enrich_dir,
                           "A-3_WT_B-3_KO-S_DOWNgenelist_paired.txt")
sample_name_contrast <- 'KO+S'

GOs_KOrS <- run_enrichGO_UP_DOWN(up_genes_path = up_genes_path, 
                                 do_genes_path, sample_name_contrast)
Number of KO+S up-regulated genes entering the GO term analysis: 1446
Number of KO+S down-regulated genes entering the GO term analysis: 1425

Merge results into one dataframe

Code
KOs_GOs <- rbind(GOs_KO, GOs_KOrL, GOs_KOrS) 

Now check the common GO terms:

Code
KOs_GOs |> 
  group_by(Direction, ID, Description) |>
  summarise(Num_GO_found = n(), .groups = 'keep' ) |>
  subset(Num_GO_found >= 3) -> common_GOs

# View(subset(KOs_GOs, ID %in% common_GOs$ID))

Show the most significant of the shared common GO terms and to limit the html widget overload filter only for biological processes ontology. Also remove the geneID column for simplicity.

Code
KOs_GOs |>
subset(ID %in% common_GOs$ID & ONTOLOGY == "BP" & p.adjust < 0.0000001) |>
  select(-geneID) |>
  mutate(across(.cols = c(ends_with('value'), 'p.adjust'), .fns = \(x) signif(x, digits = 2)  ) ) |>
  datatable(rownames = FALSE, filter = 'top', 
            options = list(pageLength = 5, autoWidth = TRUE) )

Select most interesting and significant GO terms between all 3 conditions.

Code
UP_GOs <- c('GO:0060537',  'GO:0010631', 'GO:0060485', 'GO:0016055')

DOWN_GOs <- c('GO:0030900', 'GO:0007409', 'GO:0098978', 'GO:0050808')

GOIs <- c(UP_GOs, DOWN_GOs)

Make a dataframe for plotting the shared GO terms from the up-regulated genes

Code
KOs_GOs |>
  subset(ID %in% UP_GOs) |>
  subset(Direction == 'UP') -> df_UP

Make figure S7E

Code
plot_heatmap_upset(df = df_UP, 
                   htmp_min_col = '#fad7d9', 
                   htmp_max_col = '#E63945',
                   lgnd_mm = c(3.5, 1), 
                   patch_rel_heights = c(5, 0.55),
                   bar_num_nudge = 7, 
                   k_text = 30)  -> p_up
p_up

Save to pdf.

Code
ggsave(filename = 'FigS7E_heatmap_upset_GO_UP.pdf', 
       plot = p_up, device = cairo_pdf, path = pdf_dir_fig,
       units = 'cm', width = 7, height = 5 )

Make a dataframe for plotting the shared GO terms from the up-regulated genes

Code
KOs_GOs |>
  subset(ID %in% DOWN_GOs) |>
  subset(Direction == 'DOWN' ) -> df_DO

Make figure S7F.

Code
plot_heatmap_upset(df = df_DO, 
                   htmp_min_col = '#C0CCDD', 
                   htmp_max_col = '#305890',
                   lgnd_mm = c(3, 1), 
                   patch_rel_heights = c(5, 0.55),
                   bar_num_nudge = 10
                   ) -> p_down
p_down

Save to pdf.

Code
ggsave(filename = 'FigS7F_heatmap_upset_GO_down.pdf', 
       plot = p_down, device = cairo_pdf, path = pdf_dir_fig,
       units = 'cm', width = 7, height = 5 )

End of the analysis.

5 Session Info

Code
sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.1.2 (2021-11-01)
 os       macOS Catalina 10.15.7
 system   x86_64, darwin17.0
 ui       X11
 language (EN)
 collate  C
 ctype    UTF-8
 tz       Europe/Madrid
 date     2024-02-09
 pandoc   2.10.1 @ /usr/local/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 ! package              * version   date (UTC) lib source
   abind                  1.4-5     2016-07-21 [1] CRAN (R 4.1.0)
   ade4                   1.7-22    2023-02-06 [1] CRAN (R 4.1.2)
   annotate               1.72.0    2021-10-26 [1] Bioconductor
   AnnotationDbi        * 1.56.2    2021-11-09 [1] Bioconductor
   ape                    5.7-1     2023-03-13 [1] CRAN (R 4.1.2)
   aplot                  0.2.0     2023-08-09 [1] CRAN (R 4.1.2)
   backports              1.4.1     2021-12-13 [1] CRAN (R 4.1.0)
   beeswarm               0.4.0     2021-06-01 [1] CRAN (R 4.1.0)
   Biobase              * 2.54.0    2021-10-26 [1] Bioconductor
   BiocFileCache          2.2.1     2022-01-23 [1] Bioconductor
   BiocGenerics         * 0.40.0    2021-10-26 [1] Bioconductor
   BiocParallel           1.28.3    2021-12-09 [1] Bioconductor
   biomaRt                2.50.3    2022-02-06 [1] Bioconductor
   Biostrings             2.62.0    2021-10-26 [1] Bioconductor
   bit                    4.0.5     2022-11-15 [1] CRAN (R 4.1.2)
   bit64                  4.0.5     2020-08-30 [1] CRAN (R 4.1.0)
   bitops                 1.0-7     2021-04-24 [1] CRAN (R 4.1.0)
   blob                   1.2.4     2023-03-17 [1] CRAN (R 4.1.2)
   broom                  1.0.5     2023-06-09 [1] CRAN (R 4.1.2)
   bslib                  0.5.1     2023-08-11 [1] CRAN (R 4.1.2)
   cachem                 1.0.8     2023-05-01 [1] CRAN (R 4.1.2)
   Cairo                  1.6-1     2023-08-18 [1] CRAN (R 4.1.2)
   callr                  3.7.3     2022-11-02 [1] CRAN (R 4.1.2)
   car                    3.1-2     2023-03-30 [1] CRAN (R 4.1.2)
   carData                3.0-5     2022-01-06 [1] CRAN (R 4.1.2)
   cli                    3.6.1     2023-03-23 [1] CRAN (R 4.1.2)
   clusterProfiler      * 4.2.2     2022-01-13 [1] Bioconductor
   colorspace             2.1-0     2023-01-23 [1] CRAN (R 4.1.2)
   crayon                 1.5.2     2022-09-29 [1] CRAN (R 4.1.2)
   crosstalk              1.2.0     2021-11-04 [1] CRAN (R 4.1.0)
   csaw                   1.28.0    2021-10-26 [1] Bioconductor
   curl                   5.2.0     2023-12-08 [1] CRAN (R 4.1.2)
   data.table             1.14.8    2023-02-17 [1] CRAN (R 4.1.2)
   DBI                    1.1.3     2022-06-18 [1] CRAN (R 4.1.2)
   dbplyr                 2.3.3     2023-07-07 [1] CRAN (R 4.1.2)
   DelayedArray           0.20.0    2021-10-26 [1] Bioconductor
   desc                   1.4.2     2022-09-08 [1] CRAN (R 4.1.2)
   DESeq2                 1.34.0    2021-10-26 [1] Bioconductor
   devtools               2.4.5     2022-10-11 [1] CRAN (R 4.1.2)
   digest                 0.6.33    2023-07-07 [1] CRAN (R 4.1.2)
   DO.db                  2.9       2022-04-24 [1] Bioconductor
   DOSE                   3.20.1    2021-11-18 [1] Bioconductor
   downloader             0.4       2015-07-09 [1] CRAN (R 4.1.0)
   dplyr                * 1.1.2     2023-04-20 [1] CRAN (R 4.1.2)
   DT                   * 0.29      2023-08-29 [1] CRAN (R 4.1.2)
   edgeR                  3.36.0    2021-10-26 [1] Bioconductor
   ellipsis               0.3.2     2021-04-29 [1] CRAN (R 4.1.0)
   enrichplot             1.14.2    2022-02-24 [1] Bioconductor
   evaluate               0.21      2023-05-05 [1] CRAN (R 4.1.2)
   fansi                  1.0.4     2023-01-22 [1] CRAN (R 4.1.2)
   farver                 2.1.1     2022-07-06 [1] CRAN (R 4.1.2)
   fastmap                1.1.1     2023-02-24 [1] CRAN (R 4.1.2)
   fastmatch              1.1-4     2023-08-18 [1] CRAN (R 4.1.2)
   fgsea                  1.20.0    2021-10-26 [1] Bioconductor
   filelock               1.0.2     2018-10-05 [1] CRAN (R 4.1.0)
   forcats              * 1.0.0     2023-01-29 [1] CRAN (R 4.1.2)
   foreign                0.8-84    2022-12-06 [1] CRAN (R 4.1.2)
   fs                     1.6.3     2023-07-20 [1] CRAN (R 4.1.2)
   genefilter             1.76.0    2021-10-26 [1] Bioconductor
   geneplotter            1.72.0    2021-10-26 [1] Bioconductor
   generics               0.1.3     2022-07-05 [1] CRAN (R 4.1.2)
   GenomeInfoDb           1.30.1    2022-01-30 [1] Bioconductor
   GenomeInfoDbData       1.2.7     2023-08-29 [1] Bioconductor
   GenomicRanges          1.46.1    2021-11-18 [1] Bioconductor
   ggalluvial             0.12.5    2023-02-22 [1] CRAN (R 4.1.2)
   ggbeeswarm             0.7.2     2023-04-29 [1] CRAN (R 4.1.2)
   ggfittext              0.10.0    2023-04-04 [1] CRAN (R 4.1.2)
   ggforce              * 0.4.1     2022-10-04 [1] CRAN (R 4.1.2)
   ggfun                  0.1.2     2023-08-09 [1] CRAN (R 4.1.2)
   ggplot2              * 3.4.3     2023-08-14 [1] CRAN (R 4.1.2)
   ggplotify              0.1.2     2023-08-09 [1] CRAN (R 4.1.2)
   ggraph                 2.1.0     2022-10-09 [1] CRAN (R 4.1.2)
   ggrastr              * 1.0.2     2023-06-01 [1] CRAN (R 4.1.2)
   ggrepel                0.9.3     2023-02-03 [1] CRAN (R 4.1.2)
   ggseqlogo              0.1       2017-07-25 [1] CRAN (R 4.1.0)
   ggsignif             * 0.6.4     2022-10-13 [1] CRAN (R 4.1.2)
   ggtree                 3.2.1     2021-11-16 [1] Bioconductor
   glue                   1.6.2     2022-02-24 [1] CRAN (R 4.1.2)
   GO.db                  3.14.0    2023-11-15 [1] Bioconductor
   GOSemSim               2.20.0    2021-10-26 [1] Bioconductor
   graphlayouts           1.0.0     2023-05-01 [1] CRAN (R 4.1.2)
   gridExtra              2.3       2017-09-09 [1] CRAN (R 4.1.0)
   gridGraphics           0.5-1     2020-12-13 [1] CRAN (R 4.1.0)
   gtable                 0.3.4     2023-08-21 [1] CRAN (R 4.1.2)
   hms                    1.1.3     2023-03-21 [1] CRAN (R 4.1.2)
   htmltools              0.5.6     2023-08-10 [1] CRAN (R 4.1.2)
   htmlwidgets            1.6.2     2023-03-17 [1] CRAN (R 4.1.2)
   httpuv                 1.6.11    2023-05-11 [1] CRAN (R 4.1.2)
   httr                   1.4.7     2023-08-15 [1] CRAN (R 4.1.2)
   igraph                 1.5.1     2023-08-10 [1] CRAN (R 4.1.2)
   IRanges              * 2.28.0    2021-10-26 [1] Bioconductor
   isoband                0.2.7     2022-12-20 [1] CRAN (R 4.1.2)
   jquerylib              0.1.4     2021-04-26 [1] CRAN (R 4.1.0)
   jsonlite               1.8.7     2023-06-29 [1] CRAN (R 4.1.2)
   KEGGREST               1.34.0    2021-10-26 [1] Bioconductor
   knitr                  1.43      2023-05-25 [1] CRAN (R 4.1.2)
   labeling               0.4.2     2020-10-20 [1] CRAN (R 4.1.0)
   later                  1.3.1     2023-05-02 [1] CRAN (R 4.1.2)
   lattice                0.21-8    2023-04-05 [1] CRAN (R 4.1.2)
   lazyeval               0.2.2     2019-03-15 [1] CRAN (R 4.1.0)
   lifecycle              1.0.3     2022-10-07 [1] CRAN (R 4.1.2)
   limma                  3.50.3    2022-04-07 [1] Bioconductor
   locfit                 1.5-9.8   2023-06-11 [1] CRAN (R 4.1.2)
   magrittr               2.0.3     2022-03-30 [1] CRAN (R 4.1.2)
   MASS                   7.3-60    2023-05-04 [1] CRAN (R 4.1.2)
   Matrix                 1.6-1     2023-08-14 [1] CRAN (R 4.1.2)
   MatrixGenerics         1.6.0     2021-10-26 [1] Bioconductor
   matrixStats            1.0.0     2023-06-02 [1] CRAN (R 4.1.2)
   memoise                2.0.1     2021-11-26 [1] CRAN (R 4.1.0)
   metapod                1.2.0     2021-10-26 [1] Bioconductor
   MetBrewer              0.2.0     2022-03-21 [1] CRAN (R 4.1.2)
   mime                   0.12      2021-09-28 [1] CRAN (R 4.1.0)
   miniUI                 0.1.1.1   2018-05-18 [1] CRAN (R 4.1.0)
   msa                    1.26.0    2021-10-26 [1] Bioconductor
   munsell                0.5.0     2018-06-12 [1] CRAN (R 4.1.0)
 R niar                 * 0.3.0     <NA>       [?] <NA>
   nlme                   3.1-163   2023-08-09 [1] CRAN (R 4.1.2)
   org.Mm.eg.db         * 3.14.0    2023-11-15 [1] Bioconductor
   patchwork            * 1.1.3     2023-08-14 [1] CRAN (R 4.1.2)
   pheatmap             * 1.0.12    2019-01-04 [1] CRAN (R 4.1.0)
   pillar                 1.9.0     2023-03-22 [1] CRAN (R 4.1.2)
   pkgbuild               1.4.2     2023-06-26 [1] CRAN (R 4.1.2)
   pkgconfig              2.0.3     2019-09-22 [1] CRAN (R 4.1.0)
   pkgload                1.3.2.1   2023-07-08 [1] CRAN (R 4.1.2)
   plyr                   1.8.8     2022-11-11 [1] CRAN (R 4.1.2)
   png                    0.1-8     2022-11-29 [1] CRAN (R 4.1.2)
   polyclip               1.10-4    2022-10-20 [1] CRAN (R 4.1.2)
   prettyunits            1.1.1     2020-01-24 [1] CRAN (R 4.1.0)
   processx               3.8.2     2023-06-30 [1] CRAN (R 4.1.2)
   profvis                0.3.8     2023-05-02 [1] CRAN (R 4.1.2)
   progress               1.2.2     2019-05-16 [1] CRAN (R 4.1.0)
   promises               1.2.1     2023-08-10 [1] CRAN (R 4.1.2)
   ps                     1.7.5     2023-04-18 [1] CRAN (R 4.1.2)
   psychTools             2.3.6     2023-06-18 [1] CRAN (R 4.1.2)
   purrr                  1.0.2     2023-08-10 [1] CRAN (R 4.1.2)
   qvalue                 2.26.0    2021-10-26 [1] Bioconductor
   R6                     2.5.1     2021-08-19 [1] CRAN (R 4.1.0)
   rappdirs               0.3.3     2021-01-31 [1] CRAN (R 4.1.0)
   RColorBrewer           1.1-3     2022-04-03 [1] CRAN (R 4.1.2)
   Rcpp                   1.0.11    2023-07-06 [1] CRAN (R 4.1.2)
   RCurl                  1.98-1.12 2023-03-27 [1] CRAN (R 4.1.2)
   readr                * 2.1.4     2023-02-10 [1] CRAN (R 4.1.2)
   remotes                2.4.2.1   2023-07-18 [1] CRAN (R 4.1.2)
   reshape2               1.4.4     2020-04-09 [1] CRAN (R 4.1.0)
   rlang                  1.1.1     2023-04-28 [1] CRAN (R 4.1.2)
   rmarkdown              2.24      2023-08-14 [1] CRAN (R 4.1.2)
   rprojroot              2.0.3     2022-04-02 [1] CRAN (R 4.1.2)
   Rsamtools              2.10.0    2021-10-26 [1] Bioconductor
   RSQLite                2.3.1     2023-04-03 [1] CRAN (R 4.1.2)
   rstatix              * 0.7.2     2023-02-01 [1] CRAN (R 4.1.2)
   rstudioapi             0.15.0    2023-07-07 [1] CRAN (R 4.1.2)
   S4Vectors            * 0.32.4    2022-03-29 [1] Bioconductor
   sass                   0.4.7     2023-07-15 [1] CRAN (R 4.1.2)
   scales                 1.2.1     2022-08-20 [1] CRAN (R 4.1.2)
   scatterpie             0.2.1     2023-06-07 [1] CRAN (R 4.1.2)
   seqinr                 4.2-30    2023-04-05 [1] CRAN (R 4.1.2)
   sessioninfo            1.2.2     2021-12-06 [1] CRAN (R 4.1.0)
   shadowtext             0.1.2     2022-04-22 [1] CRAN (R 4.1.2)
   shiny                  1.7.5     2023-08-12 [1] CRAN (R 4.1.2)
   stringi                1.7.12    2023-01-11 [1] CRAN (R 4.1.2)
   stringr              * 1.5.0     2022-12-02 [1] CRAN (R 4.1.2)
   SummarizedExperiment   1.24.0    2021-10-26 [1] Bioconductor
   survival               3.5-7     2023-08-14 [1] CRAN (R 4.1.2)
   tibble               * 3.2.1     2023-03-20 [1] CRAN (R 4.1.2)
   tidygraph              1.2.3     2023-02-01 [1] CRAN (R 4.1.2)
   tidyr                * 1.3.0     2023-01-24 [1] CRAN (R 4.1.2)
   tidyselect             1.2.0     2022-10-10 [1] CRAN (R 4.1.2)
   tidytree               0.4.5     2023-08-10 [1] CRAN (R 4.1.2)
   treeio                 1.18.1    2021-11-14 [1] Bioconductor
   tweenr                 2.0.2     2022-09-06 [1] CRAN (R 4.1.2)
   tzdb                   0.4.0     2023-05-12 [1] CRAN (R 4.1.2)
   UpSetR               * 1.4.0     2019-05-22 [1] CRAN (R 4.1.0)
   urlchecker             1.0.1     2021-11-30 [1] CRAN (R 4.1.0)
   usethis                2.2.2     2023-07-06 [1] CRAN (R 4.1.2)
   utf8                   1.2.3     2023-01-31 [1] CRAN (R 4.1.2)
   vctrs                  0.6.3     2023-06-14 [1] CRAN (R 4.1.2)
   vipor                  0.4.5     2017-03-22 [1] CRAN (R 4.1.0)
   viridis                0.6.4     2023-07-22 [1] CRAN (R 4.1.2)
   viridisLite            0.4.2     2023-05-02 [1] CRAN (R 4.1.2)
   vroom                  1.6.3     2023-04-28 [1] CRAN (R 4.1.2)
   withr                  2.5.0     2022-03-03 [1] CRAN (R 4.1.2)
   xfun                   0.40      2023-08-09 [1] CRAN (R 4.1.2)
   XICOR                  0.4.1     2023-04-21 [1] CRAN (R 4.1.2)
   XML                    3.99-0.14 2023-03-19 [1] CRAN (R 4.1.2)
   xml2                   1.3.5     2023-07-06 [1] CRAN (R 4.1.2)
   xtable                 1.8-4     2019-04-21 [1] CRAN (R 4.1.0)
   XVector                0.34.0    2021-10-26 [1] Bioconductor
   yaml                   2.3.7     2023-01-23 [1] CRAN (R 4.1.2)
   yulab.utils            0.0.8     2023-08-22 [1] CRAN (R 4.1.2)
   zlibbioc               1.40.0    2021-10-26 [1] Bioconductor

 [1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library

 R ── Package was removed from disk.

──────────────────────────────────────────────────────────────────────────────